# -*- coding: utf-8 -*-
import scrapy

from utils.cookies_get import make_cookie
import os
"""
1. 登录模拟
2. 去重
3. ip切换
4. 中间件
5. itemloader
6. aiomysql
7. 分布式爬虫
8. 爬虫部署
"""

class DoubanSpider(scrapy.Spider):
    name = 'douban'
    allowed_domains = ['https://www.douban.com']
    # start_urls = ['http://https://www.douban.com/']

    def start_requests(self):

        cookie = {}
        headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36'
        }
        urls = ["https://www.douban.com","https://read.douban.com"]
        for i in urls:
            yield scrapy.Request(url=i, headers=headers, cookies=cookie, callback=self.parse)
    def parse(self, response):
        html = response._body.decode()
        base_dir = os.path.dirname(os.path.abspath(__file__))
        with open(os.path.join(base_dir, 'save_file', 'zhihu_index.html'), 'w') as f:
            f.write(html)
